/*
This .dofile reproduces the latest World Bank Enterprise Surveys (WBES) Indicators (66 indicators) that were first published 
on July 5, 2024, along with an additional 9 WBES indicators that existed previously but now inform the 
Business Ready (B-READY) report. All the indicators that are used in B-READY are identified as such in the 
indicator labels.

The input-file used is the "New Comprehensive" database that is available on the WBES data portal on the "Combined 
Data" tab, within the zipfile called "StandardizedNew-2006-2025-core4.zip". This "New Comprehensive*.dta" file is 
expected to exist in the same folder where this dofile is placed.

The dofile first reproduces the firm-level indicators (which are also available on the WBES data portal).
In Section 6, this .dofile calculates the economy-level means and medians, as reported on the WBES website.

Last updated on: January 23, 2026
For questions or suggestions, please contact us at: enterprisesurveys@worldbank.org
To cite the WBES, please use the following formulation: Source: World Bank Enterprise Surveys, www.enterprisesurveys.org
The most updated code is available here: https://www.enterprisesurveys.org/en/methodology as part of the Replication of WBES Indicators rubric. 
*/

	clear all
	set type double
	
**# 1. Define the input file
	global data = "New_Comprehensive_January_22_2026.dt"
	
	global date: di %tdCCYY-NN-DD date(c(current_date),"DMY")

**# 2.1 Open data (note declaring sample==1 keeps only the newest survey for each economy)
	use "$data", clear
	keep if sample == 1
	
	gen year = substr(country,-4,4)
	destring year, replace
	
**# 2.2 Handle follow-up survey variables and weights

** prepare country code variable (available in raw country-level data)
	gen a1 = 7 if country=="Peru2023"
	replace a1 = 18 if country=="Bangladesh2022"
	replace a1 = 81 if country=="Madagascar2022"
	replace a1 = 100 if country=="Indonesia2023"
	replace a1 = 107 if country=="Timor-Leste2021"
	replace a1 = 128 if country=="Iraq2022"
	replace a1 = 90 if country == "Pakistan2022"

** Some surveys have two sets of weights as some B-Ready questions were asked during follow-up surveys after interviews had taken place
	rename wt wmedian
	rename wt_BR wmedian_BR
	
		foreach var in o1 o2 o3a o3b n2a2 c37 {
			replace `var' = `var'_BR if inlist(a1,18,81,100,107,128)
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,18,81,100,107,128)
			replace wmedian_`var' = . if a1 == 100 & q_version == 2 // turning off irrelevant weights for Indonesia
		}
		foreach var in n2a {
			replace `var' = `var'_BR if inlist(a1,18,128)
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,18,128)
		}
		
		foreach var in j35a j35b j36 j37 c39 {
			replace `var' = `var'_BR if inlist(a1,18,81,107,128)
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,18,81,107,128)
		}
		
		foreach var in ge3 ge7 ge8d {
			replace `var' = `var'_BR if inlist(a1,18,81,100,128) 
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,18,81,100,128)
			replace wmedian_`var' = . if a1 == 100 & q_version == 1 // turning off irrelevant weights for Indonesia
		}

		foreach var in c42 d32 d33a d33b d34 d35 d39 d40a d40b d41 e31a e31b e32 e33 j31 j42 j43 k32 k33 k35 k36 k37 k38 k40 l35 l36 l37 l38 l40 l41 n11 n12 g32 {
			replace `var' = `var'_BR if inlist(a1,7,18,81,100,107,128)
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,7,18,81,100,107,128)
			replace wmedian_`var' = . if a1 == 100 & q_version == 2 // turning off irrelevant weights for Indonesia	
		}
		 
		foreach var in j38 j39 j40 {
			replace `var' = `var'_BR if inlist(a1,7,18,81,100,128)
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,7,18,81,100,128)
			replace wmedian_`var' = . if a1 == 100 & q_version == 2 // turning off irrelevant weights for Indonesia	
		}
		 
		replace c152_BR = c15_BR if inlist(a1,18,128) & c152_BR == . & wmedian_BR != . // this question was implemented somewhat differently in Bangladesh and Iraq
		replace c152 = c15 if inlist(a1,100) // this question was implemented under a different variable name in Indonesia
		replace c152 = c15 if c15 != . & a1 == 90 // Pakistan had this implemented in two parts, manuf and others separately
		
		foreach var in c152 c32 c34 c34b j33 j41 k162 k82 e1 e2b n2l n2k  {
			replace `var' = `var'_BR if inlist(a1,7,18,81,107,128) & year>=2021 
			gen wmedian_`var' = wmedian
			replace wmedian_`var' = wmedian_BR if inlist(a1,7,18,81,107,128) & year>=2021
		}
		replace c152 = c15_ESBR if c152 == . & wmedian_BR !=. & inlist(a1,81,107) // this question was implemented differently in Madagascar and Timor-Leste
				
		foreach var in e2b {
			replace `var' = `var'_ESBR if inlist(a1,81,107) & year>=2021 // applicable for Madagascar and Timor-Leste only
		}	
		
** handle weights 
	rename wmedian_j31 wt_6_followups   // a1: 7,18,81,100,107,128  : 26,793 
	rename wmedian_j40 wt_5a_followups  // a1: 7,18,81,100,128      : 26,865 
	rename wmedian_e2b wt_5b_followups  // a1: 7,18,81,107,128      : 28,305 
	rename wmedian_o3a wt_5c_followups  // a1: 18,81,100,107,128    : 26,950 
	rename wmedian_j35a wt_4a_followups  // a1: 18,81,107,128        : 28,462 
	rename wmedian_ge3 wt_4b_followups  // a1: 18,81,100,128         
	
	foreach var in wt_6_followups wt_5a_followups wt_5b_followups wt_5c_followups wt_4a_followups wt_4b_followups {
		egen temp_ = max(`var'), by(country)
		replace `var' = wmedian if temp_ == .  // for non-follow up surveys, replaces original weights
		drop temp_
	}	

**# 2.3 data transformation for compatibility across WBES questionnaire 
	replace k36 = 0 if inlist(k36,-5,-6) // added by Nona on September 25, 2025 in consultation with Cyriane, to handle the round-3 version of the WBES questionnaire consistency with the previous rounds
	replace k40 = 0 if inlist(k40,-5,-6) // added by Nona on September 25, 2025 in consultation with Cyriane, to handle the round-3 version of the WBES questionnaire consistency with the previous rounds
	
	replace d35 = . if d352 != . // this and the next line added on October 13, 2025, in consulation with Jorge and David, to ensure consistency of sector coverage across countries, as this edit was introduced in the 2025 questionnaire, this line removes mapping impelemted in the new-comprehensive
	replace d352 = . if d1a1a == 2 // added by Nona on October 13, 2025, in consulation with Jorge and David, to ensure consistency of sector coverage across countries, as this edit was introduced in the 2025 questionnaire
	replace d35 = d352 if d352 != . & d35 == . // added by Nona on September 25, 2025 in consultation with Nuno, to handle the round-3 version of the WBES questionnaire consistency with the previous rounds
	
	
**# 2.4 Some data cleaning
	replace l36 = -9 if l36 != . & l36 > 300   // removing extreme values
	replace l36_BR = -9 if l36_BR != . & l36_BR > 300 // removing extreme values
	
	replace o3a = -7 if a1 == 86 & inlist(o1,2,-9) // adding the filter that wasn't implemented during the survey
	replace o3b = -7 if a1 == 86 & inlist(o1,2,-9) // adding the filter that wasn't implemented during the survey
		
	clonevar temp_e1 = e1
	replace e1 = -9 if e2b == -4 & e1 == . // affecting Madagascar and Timor-Leste, 60 observations total
	replace e2b = . if e2b == -4 & e1 == -9 &  temp_e1 != e1 // affecting Madagascar and Timor-Leste, 60 observations total
	drop temp_e1
	
	replace n2a2 =-9 if n2a2>n2a & n2a2!=. & n2a!=. & n2a2>=0 & n2a >=0 // removing extreme values
	
	foreach var in n2a n2b n2l n2k g33 k31 c31 c38 c33 {
		replace `var' = -9 if `var'== 0
	}
	
	replace j40 = . if j38 == 1 // fixing Tanzania 46 observations where questions were asked while they were not applicable
	
	replace d40b = -9 if d40b == . & d40a == 0 // 4 observations, 1 in El Salvador, 3 in Slovak Republic
	replace d40b = . if d40b == -9 & d40a > 0 & d40a != .  // 2 observation in El Salvador, added by Nona on May 22, 2025
	
	replace k35 = -9 if k35 > 30 & k35 != . // this and the next two lines added after consulting with Kamal, Subika, Suzy
	replace k36 = -9 if k36 > 50 & k36 != .
	replace k40 = -9 if k40 > 50 & k40 != .
	
	replace c9b = -9 if c9b/d2*100 > 100 & c9b >= -9 & c9b != . & d2 >= -9 & d2 != . // implements hard maximum of 100 for losses from electricity
	
	replace c8=. if c8b==-9 & year > 2014 // this just removes don't knows from calculations
	
** data cleaning below is applicable only to older surveys	
	replace e2b = . if year <= 2014 | country == "Bhutan2015"
	replace e1 = . if year <= 2014  | country == "Bhutan2015"
	replace e2b = . if e1 == -9 & e2b < 0 // affecting 2 observations in Malaysia2019
	replace d13 = . if year <= 2014 | country == "Bhutan2015"
	replace l9b = . if inlist(year,2010,2011,2013,2014) | country == "Bhutan2015"
	replace c9b=. if c9a == 0	
	replace c4 = . if c4==0 & year <=2017 // data cleaning for 3 observations
	replace c13 = . if c13==0 & year <=2017 // data cleaning for 1 observation

**# 3. Remove outliers from some variables

**# 3.1 Create ratios (to feed into outlier routine)
	# delimit ;
		gen n2a2_n2a=n2a2/n2a*100 if n2a2>=0&n2a>=0; replace n2a2_n2a=-9 if (n2a2<0|n2a<0)&(n2a2!=.&n2a!=.);
	
**# 3.2 handle combinations that go into the outlier checks ;
		clonevar d33a_d33b_days = d33a if d33a>=0; replace d33a_d33b_days = d33a_d33b_days + d33b/24 if d33b >= 0 & d33b !=.;
		replace d33a_d33b_days = 0.5 if d33a_d33b_days == 0 & d33a == 0 & d33b ==-9 ;  // edited by Nona to replace missing to 0.5 on September 9, 2025 per discussions with Nuno, Jorge and Norman; added by Nona on May 22, 2025, handles cases when d33b is don't know which inadvertently counted as 0 instead of missing ; 

		replace d33a_d33b_days =                  d4a + d4b/24 if d42==2 & d4a>=0 & d4b>=0; // added by Nona on Sept. 3, 2025, handles 2025 questionnaire ;
		replace d33a_d33b_days = d43a + d43b/24 + d4a + d4b/24 if d42==1 & d4a>=0 & d4b>=0 & d43a>=0 & d43b>=0;
		
		clonevar d40a_d40b = d40a; replace d40a_d40b = d40a_d40b + d40b/24 if d40b >= 0 & d40b !=. & d40a == 0; replace d40a_d40b = 0.5 if d40a == 0 & d40b == -9; // edited by Nona to replace missing to 0.5 on September 9, 2025 per discussions with Jorge; 
		
		replace d40a_d40b =                  d14a + d14b/24 if d44==2 & d14a>=0 & d14b>=0; // added by Nona on Sept. 3, 2025, handles 2025 questionnaire ;
		replace d40a_d40b = d45a + d45b/24 + d14a + d14b/24 if d44==1 & d14a>=0 & d14b>=0 & d45a>=0 & d45b>0;	
	
		clonevar j35a_j35b = j35a; replace j35a_j35b = j35b * 12 if j35a == . & j35b >= 0 & j35b !=.; replace j35a_j35b = -9 if j35a == . & j35b == -9;
		replace j35a_j35b = j35 if j35 >= 0 & j35 !=. ; // added by Nona on October 31, 2024
		clonevar c8a_c8b = c8a; replace c8a_c8b = c8a_c8b + c8b/60 if c8b >= 0 & c8b !=.; replace c8a_c8b = 0.5 if c8b == -9; // edited by Nona to replace missing to 0.5 on September 9, 2025 per discussions with Nuno, Jorge and Norman; 
		clonevar c9a_c9b = c9a; replace c9a_c9b = c9b/d2*100 if c9b >=0 & c9b!=. & d2 >=0 & d2 != .; replace c9a_c9b=-9 if c9a_c9b==.&d2==-9;
		replace l38=1 if l38a == 0 & l38b==-9; // edited by Nona to replace missing to 1 on September 9, 2025 per discussions with Nuno, Jorge and Norman; 
	***** end of handling combinations;
	
** added by Nona on May 22, 2025, after discussion with David and Jorge, creating equivalent of d33* with d4* ; 
		egen ct_d4a = count(d4a), by(country) ;
		egen ct_d4b = count(d4b), by(country)  ;
		clonevar d4a_d4b_days = d4 if ct_d4a == 0 & ct_d4b == 0 ; drop ct_d4a ct_d4b ; // these are surveys without d4a and d4b ;
		replace d4a_d4b_days = d4a if d4a_d4b_days == . & d4a>=0; replace d4a_d4b_days = d4a_d4b_days + d4b/24 if d4b >= 0 & d4b !=.;
		replace d4a_d4b_days = 0.5 if d4a_d4b_days == 0 & d4a == 0 & d4b ==-9 ; // edited by Nona to replace missing to 0.5 on September 9, 2025 per discussions with Nuno, Jorge and Norman ;
		
		clonevar temp_d4a_d4b_days = d4a_d4b_days if d4a_d4b_days>=0;
		clonevar temp_d33a_d33b_days = d33a_d33b_days if d33a_d33b_days>=0;
		replace d4 = . if (temp_d4a_d4b_days - temp_d33a_d33b_days) > 0.01 & temp_d33a_d33b_days != . & temp_d4a_d4b_days  != .  & (d4a != 0 | d33a != 0);	// this is to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		replace d4 = . if (d4 > 1 & d4 != . & d33a == 0 & d33b ==-9);	// this is to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		replace d4 = 1 if d4 == . & d4a == 0 & d4b == -9 ; // edited by Nona on September 9, 2025 to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		foreach var in d4a_d4b_days d4a d4b d33a_d33b_days d33a d33b { ;
			replace `var' = . if (temp_d4a_d4b_days - temp_d33a_d33b_days) > 0.01 & temp_d33a_d33b_days != . & temp_d4a_d4b_days  != .  ;
		} ;
		drop temp_d4a_d4b_days temp_d4a_d4b_days;

		egen ct_d14a = count(d14a), by(country) ;
		egen ct_d14b = count(d14b), by(country)  ;
		clonevar d14a_d14b_days = d14 if ct_d14a == 0 & ct_d14b == 0 ; drop ct_d14a ct_d14b ; // these are surveys without d14a and d14b ;
		replace d14a_d14b_days = d14a if d14a_d14b_days == . & d14a>=0; replace d14a_d14b_days = d14a_d14b_days + d14b/24 if d14b >= 0 & d14b !=.;
		replace d14a_d14b_days = 0.5 if d14a_d14b_days == 0 & d14a == 0 & d14b ==-9 ; // edited by Nona to replace missing to 0.5 on September 9, 2025 per discussions with Nuno, Jorge and Norman ;
		
		clonevar temp_d14a_d14b_days = d14a_d14b_days if d14a_d14b_days>=0;
		clonevar temp_d40a_d40b = d40a_d40b if d40a_d40b>=0;
		replace d14 = . if (temp_d14a_d14b_days - temp_d40a_d40b) > 0.01 & temp_d40a_d40b != . & temp_d14a_d14b_days  != .  & (d14a != 0 | d40a != 0);	// this is to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		replace d14 = . if (d14 > 1 & d14 != . & d40a == 0 & d40b ==-9);	// this is to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		replace d14 = 1 if d14 == . & d14a == 0 & d14b == -9 ; // edited by Nona on September 9, 2025 to preserve backward compatibility of 1 counting anything equaling 1 or below ;
		foreach var in d14a_d14b_days d14a d14b d40a_d40b d40a d40b { ;
			replace `var' = . if (temp_d14a_d14b_days - temp_d40a_d40b) > 0.01 & temp_d40a_d40b != . & temp_d14a_d14b_days  != .  ;
		} ;

		drop temp_d14a_d14b_days temp_d40a_d40b;

	
** This outlier routine takes +/- 3 standard deviations from the mean of the logged variables ;
	capture program drop outlier;
	program define outlier;
		gen _log`1'=log(`1'+1) if `1'>=0 ;
		g _log`1'o=0;
		egen _log`1'mean = mean(_log`1'), by(country);
		egen _log`1'sd = sd(_log`1'), by(country);
		qui replace _log`1'o=1 if _log`1'<_log`1'mean-3*_log`1'sd ;
		qui replace _log`1'o=2 if _log`1'>_log`1'mean+3*_log`1'sd & (_log`1'~=.);
		drop _log`1'mean _log`1'sd _log`1';
	end;
	# delimit cr
	
**# 3.3 put variables through outliers (from the main input-file, take column "Outlier routine applied? Yes/No", take variables that say Yes, remove duplicates, don't forget ratios, always check for duplicates) d14 added by Nona on May 22, 2025
	foreach var in g3 k32 k33 k35 k36 k38 k40 d33a_d33b_days d34 d40a_d40b d41 l35 l36 l38 n2a2_n2a e2b j43 j35a_j35b j33 j39 n11 c4 c7 c8a_c8b  c9a_c9b c37 c13 e31b l1 l5a l5b c8 d14 { 
		di in red "working on outliers of `var'"
		capture gen `var'_orig = `var'
		outlier `var' 
		replace `var'=-9 if _log`var'o==1 | _log`var'o==2 // turning to "don't know", which affects weights of the remaining entries in the respective variable 
	}
	
**# 4. handle combinations using a variable from the outlier routine	
	# delimit ;
		clonevar e31a_e31b = e31a if inlist(e1,1,2); replace e31a_e31b = 1 if e31a==. & (e31b>90&e31b<=100)& inlist(e1,1,2); 	replace e31a_e31b = 2 if e31a==. & (e31b>=50&e31b<=90)& inlist(e1,1,2); 	replace e31a_e31b = 3 if e31a==. & (e31b<50&e31b>=0)& inlist(e1,1,2); replace e31a_e31b = -9 if e31a==. & e31b == -9& inlist(e1,1,2);
	# delimit cr

**# 5. Calculate firm-level indicators
	# delimit ;
	
*older indicators used in the calculations of the new;		
		gen  in4_c=c9a_c9b if c9a_c9b>=0;replace in4_c=. if in4_c>100; 
		
*older indicators now feeding in B-READY;
		foreach var in l5a l5b l5 {;
			replace `var' = . if `var' < 0;
		};
		egen tsu=rowtotal(l5a l5b), missing;
		gen  gend2=(tsu/l1)*100 if (sector_MS=="Manufacturing" ) & l1>=0; 
		replace gend2=. if (l5a==.& l5b==.);
		replace gend2=(l5/l1)*100 if gend2==. & l1>=0;
		replace gend2=. if gend2>100; drop tsu; 
		
		gen  bus3=g3 if g3>=0; replace bus3=. if bus3>730;
		
		gen  in3=c8 if c8>=0; replace in3=0 if c7==0 | c6==2;

		gen  in2=c7 if c7>=0;
		replace in2=0 if c6==2;  
		replace in2 = . if in2>150 & in3 > 24 & country=="Congo2009"; 
		
		gen in4=.; 
		replace in4=in4_c if in4_c!=.;
		replace in4=0 if c6==2;
		replace in4=. if c6==.;
		
		gen  in9=100 if c10==1;replace in9=0 if c10==2;
		gen  in1=c4 if c4>=0; replace in1=. if in1>730;
		gen  in5=c13 if c13>=0; replace in5=. if in5>730;
		
		gen  wk1=100 if l10==1;replace wk1=0 if l10==2;
		
		gen  t1=100 if b8==1;replace t1=0 if b8==2;
		
*indicators first published on July 5, 2024;
		gen reg9=j43 if j43>=0;
		gen reg10=100 if inlist(l40,1)==1; replace reg10=0 if inlist(l40,2)==1;
		gen reg11=100 if inlist(l41,1)==1; replace reg11=0 if inlist(l41,2)==1;
		gen reg12=0 if g30a==0; replace reg12=25 if g30a==1; replace reg12=50 if g30a==2; replace reg12=75 if g30a==3; replace reg12=100 if g30a==4; // edited for scale on September 18, 2025 ;
		gen tax1=j35a_j35b if j35a_j35b>=0;
		gen tax2=100 if inlist(j36,1,2)==1; replace tax2=0 if inlist(j36,3)==1;
		gen tax3=100 if inlist(j37,1,2)==1; replace tax3=0 if inlist(j37,3)==1;
		gen tax4=j33 if j33>=0;
		gen tax5=j39 if j39>=0;
		gen tax6=100 if inlist(j40,1,2)==1; replace tax6=0 if j38==1;
		gen tax7 = n2a2_n2a if n2a2_n2a>=0;
		gen tax8=n11 if n11>=0;
		gen fin26=100 if inlist(k17,2,3,4)==1; replace fin26=0 if inlist(k17,1,5,6,7)==1;
		gen fin27=k32 if k32>=0;
		gen fin28=k33 if k33>=0;
		gen fin29=k35 if k35>=0;
		gen fin30=k36 if k36>=0;
		gen fin31=k38 if k38>=0;
		gen fin32=k40 if k40>=0;
		gen fin33=0 if k30==0; replace fin33=25 if k30==1; replace fin33=50 if k30==2; replace fin33=75 if k30==3; replace fin33=100 if k30==4; // edited for scale on September 18, 2025 ;
		gen in18=100 if inlist(c152,1)==1; replace in18=0 if inlist(c152,2)==1;
		gen in19=c162 if c162 >=0;
		gen in20=c172 if c172 >=0;
		gen in21=100 if inlist(c39,1)==1; replace in21=0 if inlist(c39,2)==1;
		gen in22=c37 if c37>=0;
		gen in23=0 if d30a==0; replace in23=25 if d30a==1; replace in23=50 if d30a==2; replace in23=75 if d30a==3; replace in23=100 if d30a==4; // edited for scale on September 18, 2025 ;
		gen in24=100 if inlist(ge3,1)==1; replace in24=0 if inlist(ge3,2)==1;
		gen in25=100 if inlist(ge7,1)==1; replace in25=0 if inlist(ge7,2)==1;
		gen in26=100 if inlist(ge8d,1)==1; replace in26=0 if inlist(ge8d,2)==1;
		gen gend7=0 if j42==1 & !( (b4==2 & (b7a == -9|b7a==.)) | ((b4==-9|b4==.) & b7a == 2) | ((b4==-9|b4==.) & (b7a == -9|b7a==.)) ); replace gend7=100 if gend7==0 & (b4==1 | b7a == 1);
		gen gend8=l12a if l12a >=0 ; replace gend8 = l12a1 / ( (gend2/100) * l1 ) * 100 if l12a1 >=0 & l12a1 !=. & gend2 != . & l1 >= 0 & l1 !=. ; replace gend8 = . if (!inrange(gend8,0,100))|sector_MS!="Manufacturing"; // gend2 being divided by 100 added on December 10, 2025 by Nona, Ziad, Phoebe, per mistake documented in the indicator-description ;
		gen gend9=l12a if l12a >=0 ; replace gend9 = l12a1 / ( (gend2/100) * l1 ) * 100 if l12a1 >=0 & l12a1 !=. & gend2 != . & l1 >= 0 & l1 !=. ; replace gend9 = . if (!inrange(gend9,0,100))|sector_MS=="Manufacturing"; // gend2 being divided by 100 added on December 10, 2025 by Nona, Ziad, Phoebe, per mistake documented in the indicator-description ;
		gen tr18=d33a_d33b_days if d33a_d33b_days>=0;
		egen temp=count(tr18), by(country);
		gen tr18_u=tr18; replace tr18_u = 0 if tr18 == . & inlist(d1a1a,1,3) & (d4a == -7 | d4 == -7) & temp >0 ; drop temp; // added by Nona on Sept 18, 2025 ;	
	
		gen tr19=100 if inlist(d35,1)==1; replace tr19=0 if inlist(d35,2)==1;
		gen tr20=d34 if d34>=0;
		
		egen temp_d12 = count(d12b), by(sector_MS country); egen temp_d12_count = min(temp_d12), by(country); 
		gen  tr21=100 if (d12b>0 & d12b~=.) & temp_d12_count > 0 & year >= 2016 ; replace tr21=0 if d12b==0 & temp_d12_count > 0  & year >= 2016; drop temp_d12 temp_d12_count ; /*** edited by Nona on July 17, 2025, when mistake was detected, original code was: gen tr21=100 if d13==1 replace tr21=0 if d13==2***/;
	
		gen tr22=d12b if d12b >=0;
		gen tr23=d14 if d14 >=0; // changed to d14 from d14a if by Nona on May 22, 2025; 
		gen tr24=d40a_d40b if d40a_d40b>=0;
		egen temp=count(tr24), by(country);
		gen tr24_u=tr24; replace tr24_u = 0 if tr24_u == . & (d14 == -7 | d14a == -7) & temp >0 ; drop temp ; // added by Nona on Sept 18, 2025, condition tr24_u == .  to override one observation where the follow-up is inconsistent with the baseline, taking the follow-up  ;
		
		gen tr25=d41 if d41>=0;
		gen tr26=0 if d30b==0; replace tr26=25 if d30b==1; replace tr26=50 if d30b==2; replace tr26=75 if d30b==3; replace tr26=100 if d30b==4; // edited for scale on September 18, 2025 ;
		gen comp1 = 0 if e31a_e31b == 1 & e1 != 3; replace comp1 = 50 if e31a_e31b == 2 & e1 != 3; replace comp1 = 100 if e31a_e31b == 3 & e1 != 3;
		replace comp1 = 0 if e312 == 1 & e1 != 3; replace comp1 = 50 if e312 == 2 & e1 != 3; replace comp1 = 100 if e312 == 3 & e1 != 3 ; // added by Nona on November 1, 2024, per suggestion from Antoni Albert Nogues Comas; 
		gen comp2=100 if (e2b<2&e2b>=0 & e1 != 3); replace comp2=0 if ((e2b>=2&e2b!=.)|e2b==-4) & e1 != 3;
		gen comp3=0 if ((e2b>=0&e2b!=.)|e2b==-4) & e1 != 3; replace comp3=100 if (e2b>=2&e2b<=5 & e1 != 3);
		gen comp4=100 if ((e2b>5&e2b!=.)|e2b==-4) & e1 != 3; replace comp4=0 if (e2b<=5&e2b>=0 & e1 != 3);
		gen comp5=0 if inlist(e33,1)==1; replace comp5=100 if inlist(e33,2)==1;
		gen comp6=100 if inlist(e33,-7)==1; replace comp6=0 if inlist(e33,1,2,-6)==1;
		gen comp7= 0 if e32 == 2; replace comp7 = 50 if e32 == 3; replace comp7 = 100 if e32 == 1;
		gen comp8=100 if inlist(c42,4)==1; replace comp8=50 if inlist(c42,3,2)==1; replace comp8=0 if inlist(c42,1)==1;
		gen comp9=100 if inlist(j41,1)==1; replace comp9=2/3*100 if inlist(j41,2)==1; replace comp9=1/3*100 if inlist(j41,3)==1; replace comp9=0 if inlist(j41,4)==1; // edited for scale on September 18, 2025 ;
		gen disp1=100 if inlist(o1,1)==1; replace disp1=0 if inlist(o1,2)==1;
		gen disp2=100 if inlist(o2,1)==1; replace disp2=0 if inlist(o2,2)==1;
		gen disp3=100 if inlist(j31,1,2)==1; replace disp3=0 if inlist(j31,3,4)==1; // edited for scale on September 18, 2025 ;
		gen disp4=100 if inlist(o3a,1,2)==1; replace disp4=0 if inlist(o3a,3,4)==1; // edited for scale on September 18, 2025 ;
		gen disp5=100 if inlist(o3b,1,2)==1; replace disp5=0 if inlist(o3b,3,4)==1; // edited for scale on September 18, 2025 ;
		gen disp6=0 if h30==0; replace disp6=25 if h30==1; replace disp6=50 if h30==2; replace disp6=75 if h30==3; replace disp6=100 if h30==4; // edited for scale on September 18, 2025 ;
		gen wk20=100 if b3a==1; replace wk20=0 if b3a==2;
		gen wk23=l9b if l9b !=.; replace wk23=. if l9b== -9;
		gen wk24=l35 if l35>=0;
		gen wk25=l36 if l36>=0;
		gen wk26=100 if inlist(l37,1)==1; replace wk26=0 if inlist(l37,2)==1;
		gen wk27=l38 if l38>=0;
		gen wk28=0 if l30a==0; replace wk28=25 if l30a==1; replace wk28=50 if l30a==2; replace wk28=75 if l30a==3; replace wk28=100 if l30a==4; // edited for scale on September 18, 2025 ;
		gen t11=100 if (inlist(h1,1)==1& inlist(h5,1)==1&inlist(h8,1)==1&inlist(size,2,3)==1); replace t11=0 if (inlist(h1,2)==1 | inlist(h5,2)==1| inlist(h8,2)==1)&inlist(size,2,3)==1;

		gen wk21 = size_num;
		gen wk22= l1/wk21*100 if l1>=0 & l1 !=. & wk21 != .;
		
		gen  t7=100 if h1==1; replace t7=0 if h1==2; /** added by Nona on May 1, 2017 **/;
		gen t7_ml=t7 if inlist(size,2,3); // added by Nona on Sept. 18, 2025;
		gen  t9=100 if h5==1; replace t9=0 if h5==2; /** added by Nona on May 1, 2017 **/;
		gen t9_ml=t9 if inlist(size,2,3); // added by Nona on Sept. 18, 2025;
	# delimit cr

** Keep and order the indicators created above
	keep idstd country size_num size bus3 reg9 reg10 reg11 reg12 tax1 tax2 tax3 tax4 tax5 tax6 tax7 tax8 fin26 fin27 fin28 fin29 fin30 fin31 fin32 fin33 in2 in3 in4 in9 in1 in18 in19 in20 in5 in21 in22 in23 in24 in25 in26 gend7 gend8 gend9 tr18 tr18_u tr19 tr20 tr21 tr22 tr23 tr24 tr24_u tr25 tr26 comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 disp1 disp2 disp3 disp4 disp5 disp6 wk1 wk20 wk21 wk22 wk23 wk24 wk25 wk26 wk27 wk28 t11 t1 t7_ml t9_ml wmedian wmedian_BR wt_6_followups wt_5a_followups wt_5b_followups wt_5c_followups wt_4a_followups wt_4b_followups 
	order idstd country size_num size bus3 reg9 reg10 reg11 reg12 tax1 tax2 tax3 tax4 tax5 tax6 tax7 tax8 fin26 fin27 fin28 fin29 fin30 fin31 fin32 fin33 in2 in3 in4 in9 in1 in18 in19 in20 in5 in21 in22 in23 in24 in25 in26 gend7 gend8 gend9 tr18 tr18_u tr19 tr20 tr21 tr22 tr23 tr24 tr24_u tr25 tr26 comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 disp1 disp2 disp3 disp4 disp5 disp6 wk1 wk20 wk21 wk22 wk23 wk24 wk25 wk26 wk27 wk28 t11 t1 t7_ml t9_ml wmedian wmedian_BR wt_6_followups wt_5a_followups wt_5b_followups wt_5c_followups wt_4a_followups wt_4b_followups 
** label variables
	capture program drop label_indicators
	program define label_indicators	
		lab var bus3 "[B-READY] Days to obtain a construction-related permit [median]
		lab var reg9 "[B-READY] Days to receive payment under government contract
		lab var reg10 "[B-READY] Percent of firms visited or inspected for health or safety
		lab var reg11 "[B-READY] Percent of firms with a report issued for health or safety inspection
		lab var reg12 "[B-READY] Perceptions index of access to land as a constraint
		lab var tax1 "[B-READY] Hours spent on tax compliance annually [median]
		lab var tax2 "[B-READY] Percent of firms filing taxes electronically
		lab var tax3 "[B-READY] Percent of firms paying taxes electronically
		lab var tax4 "[B-READY] Weeks until the final tax audit report [median]
		lab var tax5 "[B-READY] Weeks to receive VAT refund [median]
		lab var tax6 "[B-READY] Percent of firms reporting too long or complicated refund process as the main reason for not applying for a VAT refund, among those providing a reason other than no need
		lab var tax7 "[B-READY] Reported share of social security and employment-based taxes in annual cost of labor
		lab var tax8 "[B-READY] Reported effective income-based tax rate
		lab var fin26 "[B-READY] Percent of firms reporting unfavorable rates, collateral, or procedures as main reason for not applying for loans
		lab var fin27 "[B-READY] Days to receive a decision on loan application
		lab var fin28 "[B-READY] Proportion of sales paid by customers electronically
		lab var fin29 "[B-READY] Days to receive the main type of electronic payment 
		lab var fin30 "[B-READY] Cost to receive main type of electronic payment (% of transaction)
		lab var fin31 "[B-READY] Proportion of payments made electronically
		lab var fin32 "[B-READY] Cost to make main type of electronic payment (% of transaction)
		lab var fin33 "[B-READY] Perceptions index of access to finance as a constraint
		lab var in2 "[B-READY] Number of electrical outages in a typical month
		lab var in3 "[B-READY] Duration, in hours, of a typical electrical outage [median]
		lab var in4 "[B-READY] Losses due to electrical outages (% of annual sales) [median]
		lab var in9 "[B-READY] Percent of firms owning or sharing  a generator
		lab var in1 "[B-READY] Days to obtain an electrical connection, upon application [median]
		lab var in18 "[B-READY] Percent of firms experiencing water insufficiencies
		lab var in19 "Number of water insufficiencies in a typical month
		lab var in20 "Duration, in hours, of a typical water shortage
		lab var in5 "[B-READY] Days to obtain a water connection [median]
		lab var in21 "[B-READY] Percent of firms experiencing internet disruptions
		lab var in22 "[B-READY] Days to obtain internet connection [median]
		lab var in23 "[B-READY] Perceptions index of transportation as a constraint
		lab var in24 "Percent of firms experiencing damage of physical assets due to extreme weather
		lab var in25 "Percent of firms monitoring own CO2 emissions over last 3 years
		lab var in26 "Percent of firms adopting energy management measures to reduce emissions over last 3 years
		lab var gend7 "[B-READY] Percent of firms owned or managed by women among those that held a government contract in last 3 years
		lab var gend8 "Proportion of women workers offered formal training over last fiscal year (%)*
		lab var gend9 "Proportion of women workers offered formal training over last fiscal year (%, only service sector)		
		lab var tr18_u "Days for all exported goods to clear all border control agencies
		lab var tr18 "[B-READY] Days for exported goods to clear all border control agencies [median]
		lab var tr19 "[B-READY] Percent of firms that export at least some products via parcel service
		lab var tr20 "[B-READY] Typical costs to comply with all export requirements (% of value of goods exported)
		lab var tr21 "Percent of firms using material inputs and/or supplies of foreign origin
		lab var tr22 "Proportion of total inputs that are of foreign origin (%)
		lab var tr23 "Days to clear imports from customs
		lab var tr24 "[B-READY] Days for imported goods to clear all border control agencies [median] 
		lab var tr24_u "Days for all imported goods to clear all border control agencies
		lab var tr25 "[B-READY] Typical costs to comply with all import requirements (% of value of goods imported)
		lab var tr26 "[B-READY] Perceptions index of customs and trade regulations as a constraint
		lab var comp1 "[B-READY] Index of market share of the largest competitor (excluding firms whose main market is international)
		lab var comp2 "[B-READY] Percent of firms reporting less than two competitors in their main product's main market (excluding firms whose main market is international)
		lab var comp3 "[B-READY] Percent of firms reporting between two and five (inclusive) competitors in their main product's main market (excluding firms whose main market is international)
		lab var comp4 "[B-READY] Percent of firms reporting more than five competitors in their main product's main market (excluding firms whose main market is international)
		lab var comp5 "[B-READY] Percent of firms that cannot increase prices more than competitors without losing customers
		lab var comp6 "[B-READY] Percent of firms reporting their prices to be regulated
		lab var comp7 "[B-READY] Index of change of level of competition over last year
		lab var comp8 "[B-READY] Index of difficulty to switch internet providers
		lab var comp9 "[B-READY] Perceptions of the degree of difficulty to comply with government contract tender requirements
		lab var disp1 "Percent of firms that had any commercial dispute
		lab var disp2 "Percent of firms that use courts, arbitration, mediation, or conciliation to resolve or attempt to resolve its commercial disputes
		lab var disp3 "[B-READY] Perceptions of courts being independent and impartial in resolving commercial disputes
		lab var disp4 "[B-READY] Perceptions of arbitration being a reliable alternative to courts for resolving commercial disputes
		lab var disp5 "[B-READY] Perceptions of mediation being a reliable alternative to courts for resolving commercial disputes
		lab var disp6 "[B-READY] Perceptions index of courts as a constraint
		lab var wk1 "[B-READY] Percent of firms offering formal training over last fiscal year
		lab var wk20 "Percent of firms where the largest owner is also the top manager
		lab var wk21 "Number of permanent full-time equivalent workers
		lab var wk22 "Proportion of permanent workers, out of all permanent full-time equivalent workers
		lab var wk23 "Proportion of permanent full time workers that completed high school
		lab var wk24 "[B-READY] Weeks to dismiss a full-time permanent worker
		lab var wk25 "[B-READY] Weeks paid in severance 
		lab var wk26 "[B-READY] Percent of firms involved in labor dispute over last 3 years
		lab var wk27 "[B-READY] Months to resolve labor dispute
		lab var wk28 "[B-READY] Perceptions index of labor regulations as a constraint
		lab var t11 "[B-READY] Percent of firms that introduced a new product/service and process over last 3 years, and spent on R&D over last fiscal year (excluding small firms)
		lab var t1 "[B-READY] Percent of firms with an internationally-recognized quality certification
		lab var t7_ml "Percent of medium or large firms that introduced a new product/service over last 3 years
		lab var t9_ml "Percent of medium or large firms that introduced a process innovation over last 3 years
	end
	
	label_indicators
	
	label variable wt_4b_followups  "Sampling weight, vars in 4 follow-ups in 2023 rollout: BGD IRQ MDG IDN"
	label variable wt_6_followups  "Sampling weight, vars in 6 follow-ups in 2023 rollout: BGD IRQ MDG PER TLS IDN"
	label variable wt_5a_followups "Sampling weight, vars in 5 follow-ups in 2023 rollout: BGD IRQ MDG PER IDN"
	label variable wt_5b_followups "Sampling weight, vars in 5 follow-ups in 2023 rollout: BGD IRQ MDG PER TLS"
	label variable wt_5c_followups "Sampling weight, vars in 5 follow-ups in 2023 rollout: BGD IRQ MDG TLS IDN"
	label variable wt_4a_followups  "Sampling weight, vars in 4 follow-ups in 2023 rollout: BGD IRQ MDG TLS"
		
	rename wmedian wt
	rename wmedian_BR wt_BR
	
	save "WBES Indicators_firm-level_$date.dta", replace

**# 6. Calculate economy-level indicators	 

** define list of indicators and corresponding weights
	#delimit;
	global list " reg12 fin26 fin33 in2 in9 in19 in20 in23 gend8 gend9 tr21 tr22 tr23 tr26 disp6 wk1 wk20 wk21 wk22 wk23 wk28 t11 t1 t7_ml t9_ml";
	global list_medians " bus3 in3 in4 in1 in5";

	global wt_4a_followups = " tax2 tax3 in21";
	global wt_4a_followups_medians = " tax1";

	global wt_4b_followups = " in24 in25 in26";
	global wt_4b_followups_medians = "";

	global wt_5a_followups = " tax6";
	global wt_5a_followups_medians = " tax5";

	global wt_5b_followups = "  in18 comp2 comp3 comp4 comp9";
	global wt_5b_followups_medians = " tax4";

	global wt_5c_followups = " tax7 disp1 disp2 disp4 disp5";
	global wt_5c_followups_medians  = " in22";

	global wt_6_followups = " reg9 reg10 reg11 tax8 fin27 fin28 fin29 fin30 fin31 fin32 gend7 tr19 tr20 tr25 comp1 comp5 comp6 comp7 comp8 disp3 wk24 wk25 wk26 wk27 tr18_u tr24_u";
	global wt_6_followups_medians = " tr18 tr24" ;

	global full_list = " $list $list_medians $wt_4a_followups $wt_4a_followups_medians $wt_4b_followups $wt_5a_followups $wt_5a_followups_medians $wt_5b_followups $wt_5b_followups_medians $wt_5c_followups $wt_5c_followups_medians $wt_6_followups $wt_6_followups_medians " ;

	global order = "bus3 reg9 reg10 reg11 reg12 tax1 tax2 tax3 tax4 tax5 tax6 tax7 tax8 fin26 fin27 fin28 fin29 fin30 fin31 fin32 fin33 in2 in3 in4 in9 in1 in18 in19 in20 in5 in21 in22 in23 in24 in25 in26 gend7 gend8 gend9 tr18 tr18_u tr19 tr20 tr21 tr22 tr23 tr24 tr24_u tr25 tr26 comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 disp1 disp2 disp3 disp4 disp5 disp6 wk1 wk20 wk21 wk22 wk23 wk24 wk25 wk26 wk27 wk28 t11 t1 t7_ml t9_ml";

	foreach item in list wt_4a_followups wt_4b_followups wt_5a_followups wt_5b_followups wt_5c_followups wt_6_followups { ;
		global means_`item' = subinstr("${`item'}"," "," mean_",.) ;
		global p50_`item' = subinstr("${`item'_medians}"," "," mean_",.) ;
	} ;
	
	# delimit cr
	
	foreach var in $full_list {
		gen nn_`var' = 1 if `var' != .
		clonevar mean_`var' = `var'
		clonevar p50_`var' = `var'	
	}	
	
*** country level estimates of means or medians, corresponding to the indicator display on the WBES website
	foreach item in list wt_4a_followups wt_4b_followups wt_5a_followups wt_5b_followups wt_5c_followups wt_6_followups {
		preserve
			if "`item'" == "list" {
				local wt = "wt"
				collapse (mean) $`item' (p50) ${`item'_medians} (rawsum) nn_*  [pweight=`wt'], by(country)
				tempfile temp
				save `temp', replace	
			}
			else if "`item'" == "wt_4b_followups" {
				local wt = "`item'"
				collapse (mean) $`item' [pweight=`wt'], by(country)
				merge 1:1 country using `temp', nogen
				save `temp', replace	
			}
			else {
				local wt = "`item'"
				collapse (mean) $`item' (p50) ${`item'_medians} [pweight=`wt'], by(country)
				merge 1:1 country using `temp', nogen
				save `temp', replace	
			}
		restore
	}

**# 6.1 Clean up and export in Stata .dta format
	use `temp', clear
	foreach var in $full_list {
		replace `var' = . if nn_`var' <= 5
		if "`var'" == "perf1" | "`var'" == "perf2" | "`var'" == "perf3" {
			replace `var' = . if nn_`var' < 30
		}
	}
	drop nn_*
	
	label_indicators
	order country $order
	
	* to keep just the latest WBES, you can use the following command:
	* keep if tax1 != .
	
	save "WBES Indicators_economy-level_$date.dta", replace